FMP AudioLabs
C3

Log-Frequency Spectrogram


Following Section 3.1.1 of [Müller, FMP, Springer 2015], we explain

Librosa, STFT, zero-padding

Reference frequency

Fig. 3.3 Table 3.1 Fig. 3.4 Fig. 3.5

Maybe discussion of "tuning"

Maybe discussion of "compression"

In [1]:
import os

import numpy as np
import scipy
import matplotlib
from matplotlib import pyplot as plt
import librosa
import pandas as pd
import IPython.display as ipd
import music21 as m21

%matplotlib inline

Fig. 3.3

In [2]:
# read wav file

fn_wav = os.path.join('..', 'data', 'C3', 'FMP_C3_F03.wav')
x, Fs = librosa.load(fn_wav)
ipd.Audio(x, rate=Fs)
Out[2]:
In [3]:
# compute stft

N = 2048
N = 4096 * 4
H = 512

w = scipy.signal.get_window('hann', N)

X = librosa.stft(x, n_fft=N, hop_length=H, win_length=N, window=w, pad_mode='constant')
t = librosa.frames_to_time(np.arange(X.shape[1]), sr=Fs, hop_length=H, n_fft=N)
freq = librosa.fft_frequencies(sr=Fs, n_fft=N)
In [4]:
# plot stft

fig = plt.figure(figsize=(15, 5))

left = min(t)
right = max(t)
lower = min(freq)
upper = max(freq)

eps = np.finfo(float).eps
Y = 20 * np.log10(eps + np.abs(X) ** 2)

plt.imshow(Y, origin='lower', aspect='auto', cmap='gray_r', extent=[left, right, lower, upper])
plt.clim([-30, 30])
plt.ylim([0, 4500])
plt.xlabel('Time (seconds)')
plt.ylabel('Frequency (Hz)')
plt.colorbar()

# Create a Rectangle patch
rect = matplotlib.patches.Rectangle((30, 0.5), 1, 4490, linewidth=2, edgecolor='r', facecolor='none')
plt.gca().add_patch(rect)
Out[4]:
<matplotlib.patches.Rectangle at 0x27ab0d307f0>
In [5]:
# pitch table

def F_pitch(p, a4_ref=440):
    return 2 ** ((p - 69) / 12) * a4_ref

note_infos = []
for p in range(60, 73):
    name = m21.note.Note(p).pitch.unicodeNameWithOctave
    p_pitch = F_pitch(p)
    p_pitch_lower = F_pitch(p - 0.5)
    p_pitch_upper = F_pitch(p + 0.5)
    bw = p_pitch_upper - p_pitch_lower
    note_infos.append([name, p, p_pitch, p_pitch_lower, p_pitch_upper, bw])
    
df = pd.DataFrame(note_infos, columns=['Note', '$p$', '$F_\mathrm{pitch}(p)$', '$F_\mathrm{pitch}(p-0.5)$', '$F_\mathrm{pitch}(p+0.5)$', '$BW(p)$'])
html = df.to_html(index=False, float_format='%.2f')
html = html.replace('<table', '<table style="width: 66%"')
ipd.HTML(html)
Out[5]:
Note $p$ $F_\mathrm{pitch}(p)$ $F_\mathrm{pitch}(p-0.5)$ $F_\mathrm{pitch}(p+0.5)$ $BW(p)$
Câ™®4 60 261.63 254.18 269.29 15.11
C♯4 61 277.18 269.29 285.30 16.01
Dâ™®4 62 293.66 285.30 302.27 16.97
Eâ™­4 63 311.13 302.27 320.24 17.97
Eâ™®4 64 329.63 320.24 339.29 19.04
Fâ™®4 65 349.23 339.29 359.46 20.18
F♯4 66 369.99 359.46 380.84 21.37
Gâ™®4 67 392.00 380.84 403.48 22.65
G♯4 68 415.30 403.48 427.47 23.99
Aâ™®4 69 440.00 427.47 452.89 25.42
Bâ™­4 70 466.16 452.89 479.82 26.93
Bâ™®4 71 493.88 479.82 508.36 28.53
Câ™®5 72 523.25 508.36 538.58 30.23
In [6]:
# definition of Y_lf

def F_coef(k, Fs, N):
    return k * Fs / N

def P(p, Fs, N, a4_ref=440):
    lower = F_pitch(p - 0.5)
    upper = F_pitch(p + 0.5)
    k = np.arange(N // 2)
    k_freq = F_coef(k, Fs, N)
    mask = np.logical_and(lower <= k_freq, k_freq < upper)
    return k[mask]

def get_Y_lf(Y, Fs, N, a4_ref=440):
    Y_lf = np.zeros((128, Y.shape[1]))
    for p in range(128):
        k = P(p, Fs, N, a4_ref)
        Y_lf[p, :] = Y[k, :].sum(axis=0)
    return Y_lf
In [7]:
# computation of Y_lf

Y = np.abs(X) ** 2
Y_lf = get_Y_lf(Y, Fs, N)
In [8]:
# plotting of Y_lf

fig = plt.figure(figsize=(15, 5))

left = min(t)
right = max(t)
lower = 0
upper = 128

plt.imshow(20 * np.log10(eps + Y_lf), origin='lower', aspect='auto', cmap='gray_r', extent=[left, right, lower, upper])
plt.clim([-30, 30])
plt.ylim([21, 108])
plt.xlabel('Time (seconds)')
plt.ylabel('Frequency (pitch)')
plt.colorbar()

# Create a Rectangle patch
rect = matplotlib.patches.Rectangle((30, 0.5), 1, 127, linewidth=2, edgecolor='r', facecolor='none')
plt.gca().add_patch(rect)
Out[8]:
<matplotlib.patches.Rectangle at 0x27a80592390>
In [9]:
# definition of C

def get_C(Y_lf):
    C = np.zeros((12, Y_lf.shape[1]))
    p = np.arange(128)
    for c in range(12):
        mask = (p % 12) == c
        C[c, :] = Y_lf[mask, :].sum(axis=0)
    return C
In [10]:
# computation of C

C = get_C(Y_lf)
In [11]:
# plotting of C

fig = plt.figure(figsize=(15, 5))

left = min(t)
right = max(t)
lower = 0
upper = 12

plt.imshow(20 * np.log10(eps + C), origin='lower', aspect='auto', cmap='gray_r', extent=[left, right, lower, upper])
plt.clim([0, 100])
plt.xlabel('Time (seconds)')
plt.ylabel('Chroma')
plt.colorbar()
plt.yticks(np.arange(12) + 0.5, [m21.note.Note(c).pitch.unicodeName for c in range(12)])

# Create a Rectangle patch
rect = matplotlib.patches.Rectangle((30, 0.0), 1, 12, linewidth=2, edgecolor='r', facecolor='none')
plt.gca().add_patch(rect)
Out[11]:
<matplotlib.patches.Rectangle at 0x27a80f35668>

Fig. 3.5

In [12]:
# read wav file

fn_wav = os.path.join('..', 'data', 'C3', 'FMP_C3_F05.wav')
x, Fs = librosa.load(fn_wav)
ipd.Audio(x, rate=Fs)
Out[12]:
In [13]:
# compute stft

N = 2048
N = 4096
H = 512

w = scipy.signal.get_window('hann', N)

X = librosa.stft(x, n_fft=N, hop_length=H, win_length=N, window=w, pad_mode='constant')
t = librosa.frames_to_time(np.arange(X.shape[1]), sr=Fs, hop_length=H, n_fft=N)
freq = librosa.fft_frequencies(sr=Fs, n_fft=N)

# computation of Y_lf

Y = np.abs(X) ** 2
Y_lf = get_Y_lf(Y, Fs, N)

# computation of C

C = get_C(Y_lf)
In [14]:
fig = plt.figure(figsize=(15, 10))

# plot stft
plt.subplot(2, 1, 1)

left = min(t)
right = max(t)
lower = 0
upper = 128

plt.imshow(20 * np.log10(eps + Y_lf), origin='lower', aspect='auto', cmap='gray_r', extent=[left, right, lower, upper])
plt.clim([20, 50])
plt.ylim([55, 92])
plt.xlabel('Time (seconds)')
plt.ylabel('Frequency (pitch)')
plt.colorbar()

# Create a Rectangle patch
rect = matplotlib.patches.Rectangle((30, 0.5), 1, 127, linewidth=2, edgecolor='r', facecolor='none')
plt.gca().add_patch(rect)

# plotting of C
plt.subplot(2, 1, 2)

left = min(t)
right = max(t)
lower = 0
upper = 12

plt.imshow(20 * np.log10(eps + C), origin='lower', aspect='auto', cmap='gray_r', extent=[left, right, lower, upper])
plt.clim([20, 80])
plt.xlabel('Time (seconds)')
plt.ylabel('Chroma')
plt.colorbar()
plt.yticks(np.arange(12) + 0.5, [m21.note.Note(c).pitch.unicodeName for c in range(12)])

# Create a Rectangle patch
rect = matplotlib.patches.Rectangle((30, 0.0), 1, 12, linewidth=2, edgecolor='r', facecolor='none')
plt.gca().add_patch(rect)

plt.tight_layout()

Acknowledgment: This notebook was created by Frank Zalkow and Meinard Müller.

C0 C1 C2 C3 C4 C5 C6 C7 C8